library(mosaic)
library(tidyverse)
library(lubridate)
library(DataComputing)
library(rvest)
library(broom)
How do different aspects of determining how developed a country is (such as birth rate, population, death rate, etc.) manifest themselves in the spread of COVID-19. Essentially, how does COVID-19 show different levels of impact on different countries?
Reading in the Data:
Data Source 1: COVID
COVID <- read.csv(file = "total-covid-cases-deaths-per-million.csv")
COVID
COVID %>%
nrow()
[1] 9487
COVID %>%
names()
[1] "total.covid.cases.deaths.per.million" "X"
[3] "X.1" "X.2"
[5] "X.3" "X.4"
[7] "X.5" "X.6"
[9] "X.7" "X.8"
[11] "X.9" "X.10"
[13] "X.11" "X.12"
[15] "X.13" "X.14"
[17] "X.15" "X.16"
[19] "X.17" "X.18"
[21] "X.19" "X.20"
[23] "X.21" "X.22"
[25] "X.23" "X.24"
[27] "X.25" "X.26"
[29] "X.27" "X.28"
[31] "X.29" "X.30"
[33] "X.31" "X.32"
[35] "X.33" "X.34"
[37] "X.35" "X.36"
[39] "X.37" "X.38"
[41] "X.39" "X.40"
[43] "X.41" "X.42"
[45] "X.43" "X.44"
[47] "X.45" "X.46"
[49] "X.47" "X.48"
[51] "X.49" "X.50"
[53] "X.51" "X.52"
[55] "X.53" "X.54"
[57] "X.55" "X.56"
[59] "X.57" "X.58"
[61] "X.59" "X.60"
[63] "X.61" "X.62"
[65] "X.63" "X.64"
[67] "X.65" "X.66"
[69] "X.67" "X.68"
[71] "X.69" "X.70"
[73] "X.71" "X.72"
[75] "X.73" "X.74"
[77] "X.75" "X.76"
[79] "X.77" "X.78"
[81] "X.79" "X.80"
[83] "X.81" "X.82"
[85] "X.83" "X.84"
[87] "X.85" "X.86"
[89] "X.87" "X.88"
[91] "X.89" "X.90"
[93] "X.91" "X.92"
[95] "X.93" "X.94"
[97] "X.95" "X.96"
[99] "X.97" "X.98"
[101] "X.99" "X.100"
[103] "X.101" "X.102"
[105] "X.103" "X.104"
[107] "X.105" "X.106"
[109] "X.107" "X.108"
[111] "X.109" "X.110"
[113] "X.111" "X.112"
[115] "X.113" "X.114"
[117] "X.115" "X.116"
[119] "X.117" "X.118"
[121] "X.119" "X.120"
[123] "X.121" "X.122"
[125] "X.123" "X.124"
[127] "X.125" "X.126"
[129] "X.127" "X.128"
[131] "X.129" "X.130"
[133] "X.131" "X.132"
[135] "X.133" "X.134"
[137] "X.135" "X.136"
[139] "X.137" "X.138"
[141] "X.139" "X.140"
[143] "X.141" "X.142"
[145] "X.143" "X.144"
[147] "X.145" "X.146"
[149] "X.147" "X.148"
[151] "X.149" "X.150"
[153] "X.151" "X.152"
[155] "X.153" "X.154"
[157] "X.155" "X.156"
[159] "X.157" "X.158"
[161] "X.159" "X.160"
[163] "X.161" "X.162"
[165] "X.163" "X.164"
[167] "X.165" "X.166"
[169] "X.167" "X.168"
[171] "X.169" "X.170"
[173] "X.171" "X.172"
[175] "X.173" "X.174"
[177] "X.175" "X.176"
[179] "X.177" "X.178"
[181] "X.179" "X.180"
[183] "X.181" "X.182"
[185] "X.183" "X.184"
[187] "X.185" "X.186"
[189] "X.187" "X.188"
[191] "X.189" "X.190"
[193] "X.191" "X.192"
[195] "X.193" "X.194"
[197] "X.195" "X.196"
[199] "X.197" "X.198"
[201] "X.199" "X.200"
[203] "X.201" "X.202"
[205] "X.203" "X.204"
[207] "X.205" "X.206"
[209] "X.207" "X.208"
[211] "X.209" "X.210"
[213] "X.211" "X.212"
[215] "X.213" "X.214"
[217] "X.215" "X.216"
[219] "X.217" "X.218"
[221] "X.219" "X.220"
[223] "X.221" "X.222"
[225] "X.223" "X.224"
[227] "X.225" "X.226"
[229] "X.227" "X.228"
[231] "X.229" "X.230"
[233] "X.231" "X.232"
[235] "X.233" "X.234"
[237] "X.235" "X.236"
[239] "X.237" "X.238"
[241] "X.239" "X.240"
[243] "X.241" "X.242"
[245] "X.243" "X.244"
[247] "X.245" "X.246"
[249] "X.247" "X.248"
[251] "X.249" "X.250"
[253] "X.251" "X.252"
[255] "X.253" "X.254"
COVID %>%
head()
Data Source 2: CountryData
CountryData
CountryData %>%
nrow()
[1] 256
CountryData %>%
names()
[1] "country" "area" "pop" "growth" "birth"
[6] "death" "migr" "maternal" "infant" "life"
[11] "fert" "health" "HIVrate" "HIVpeople" "HIVdeath"
[16] "obesity" "underweight" "educ" "unemploymentYouth" "GDP"
[21] "GDPgrowth" "GDPcapita" "saving" "indProd" "labor"
[26] "unemployment" "family" "tax" "budget" "debt"
[31] "inflation" "discount" "lending" "narrow" "broad"
[36] "credit" "shares" "balance" "exports" "imports"
[41] "gold" "externalDebt" "homeStock" "abroadStock" "elecProd"
[46] "elecCons" "elecExp" "elecImp" "elecCap" "elecFossil"
[51] "elecNuc" "elecHydro" "elecRenew" "oilProd" "oilExp"
[56] "oilImp" "oilRes" "petroProd" "petroCons" "petroExp"
[61] "petroImp" "gasProd" "gasCons" "gasExp" "gasImp"
[66] "gasRes" "mainlines" "cell" "netHosts" "netUsers"
[71] "airports" "railways" "roadways" "waterways" "marine"
[76] "military"
CountryData %>%
head()
Data Source 3: Continents
Continents <- read.csv(file = "countries and continents.csv")
Continents
Continents %>%
nrow()
[1] 251
Continents %>%
names()
[1] "name" "official_name_en" "official_name_fr"
[4] "ISO3166.1.Alpha.2" "ISO3166.1.Alpha.3" "M49"
[7] "ITU" "MARC" "WMO"
[10] "DS" "Dial" "FIFA"
[13] "FIPS" "GAUL" "IOC"
[16] "ISO4217.currency_alphabetic_code" "ISO4217.currency_country_name" "ISO4217.currency_minor_unit"
[19] "ISO4217.currency_name" "ISO4217.currency_numeric_code" "is_independent"
[22] "Capital" "Continent" "TLD"
[25] "Languages" "Geoname.ID" "EDGAR"
Continents %>%
head()
COVID
TidyCOVID <- COVID %>%
rename(country = total.covid.cases.deaths.per.million ) %>%
rename( Code = X ) %>%
rename(Date = X.1 ) %>%
rename(DeathsPerMillion = X.2) %>%
rename(CasesPerMillion = X.3) %>%
filter(row_number() > 1) %>%
subset(select = c(1,2,3,4,5)) %>%
mutate( country = as.character(country) ) %>%
mutate( Code = as.character(Code) ) %>%
mutate(Date = mdy(Date)) %>%
mutate(DeathsPerMillion = as.integer(DeathsPerMillion) - 1) %>%
mutate(CasesPerMillion = as.integer(CasesPerMillion) - 1)
TidyCOVID
TidyCOVID
February1 <-
TidyCOVID %>%
filter(Date == "2020-02-01") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionFeb1 = DeathsPerMillion) %>%
rename(CasesPerMillionFeb1 = CasesPerMillion)
February29 <-
TidyCOVID %>%
filter(Date == "2020-02-29") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionFeb29 = DeathsPerMillion) %>%
rename(CasesPerMillionFeb29 = CasesPerMillion)
FebGrow <-
full_join(February1, February29, by = c("country")) %>%
mutate("CasesPerMillionFebGrowth" = CasesPerMillionFeb29 - CasesPerMillionFeb1) %>%
mutate("DeathsPerMillionFebGrowth" = DeathsPerMillionFeb29 - DeathsPerMillionFeb1)
March1 <-
TidyCOVID %>%
filter(Date == "2020-03-01") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionMarch1 = DeathsPerMillion) %>%
rename(CasesPerMillionMarch1 = CasesPerMillion)
March31 <-
TidyCOVID %>%
filter(Date == "2020-03-31") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionMarch31 = DeathsPerMillion) %>%
rename(CasesPerMillionMarch31 = CasesPerMillion)
MarchGrow <-
full_join(March1, March31, by = c("country")) %>%
mutate("CasesPerMillionMarchGrowth" = CasesPerMillionMarch31 - CasesPerMillionMarch1) %>%
mutate("DeathsPerMillionMarchGrowth" = DeathsPerMillionMarch31 - DeathsPerMillionMarch1)
December31 <-
TidyCOVID %>%
filter(Date == "2019-12-31") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionDecember31 = DeathsPerMillion) %>%
rename(CasesPerMillionDecember31 = CasesPerMillion)
April5 <-
TidyCOVID %>%
filter(Date == "2020-04-05") %>%
subset(select = c(1,4,5)) %>%
rename(DeathsPerMillionApril5 = DeathsPerMillion) %>%
rename(CasesPerMillionApril5 = CasesPerMillion)
TotalGrow <-
full_join(December31, April5, by = c("country")) %>%
mutate("CasesPerMillionTotalGrowth" = CasesPerMillionApril5 - CasesPerMillionDecember31) %>%
mutate("DeathsPerMillionTotalGrowth" = DeathsPerMillionApril5 - DeathsPerMillionDecember31)
FebGrow <-
FebGrow %>%
subset(select = c(1, 6, 7))
MarchGrow <-
MarchGrow %>%
subset(select = c(1, 6, 7))
TotalGrow <-
TotalGrow %>%
subset(select = c(1, 6, 7))
Growth <-
left_join(FebGrow,MarchGrow) %>%
left_join(TotalGrow)
Joining, by = "country"
Joining, by = "country"
Growth
Continents
TidyCont <-
Continents %>%
filter(row_number() > 2) %>%
subset(select = c(1,23)) %>%
rename(country = name ) %>%
mutate( country = as.character(country) ) %>%
mutate( Continent = as.character(Continent) )
TidyCont
COVIDwCont <-
left_join(TidyCOVID, TidyCont, by = "country")
DataMix <- left_join(COVIDwCont, CountryData, by = "country")
JOEY: I think this is a good one to start out with, but if there is some way that you could make this more clear (color code, etc.) that would be great. This is a good start to our data.
ggplot(data = DataMix,aes(x = Date, y = CasesPerMillion)) +
geom_point() +
facet_wrap(~Continent)
JOEY: I like this graph because it visualizes one of the factors that contributes to the development classification. Can we add in a color code or something that would represent one more factor, such as different countries or regions of the world. The more that we can do with this one, the better.
ggplot(data= DataMix, aes(x= Date, y = DeathsPerMillion)) +
geom_point() +
facet_wrap(~Continent)
GrowthMix <-left_join(Growth, CountryData, by = "country")
ggplot(data=GrowthMix, aes(x = airports,y = CasesPerMillionTotalGrowth)) +
geom_point() +
xlim(0,5000)
ggplot(data=GrowthMix, aes(x = health,y = CasesPerMillionTotalGrowth)) +
geom_point()
ggplot(data=GrowthMix, aes(x = health,y = DeathsPerMillionTotalGrowth)) +
geom_point()
GrowthMix %>%
arrange(desc(CasesPerMillionTotalGrowth))
ShowChange <-
GrowthMix %>%
select(c(1, 6, 7, 9)) %>%
filter(country != "Africa",
country != "Asia",
country != "Europe",
country != "North America",
country != "Oceania",
country != "South America",
country != "World"
) %>%
mutate("TotalGrowth" = (CasesPerMillionTotalGrowth * round(pop/1000000, digits = 0))) %>%
mutate("TotalDeath" = (DeathsPerMillionTotalGrowth * round(pop/1000000, digits = 0)))
ShowChange %>%
arrange(desc(TotalDeath))
WorldMap(ShowChange, key = country, fill = TotalGrowth)
Mapping API still under development and may change in future releases.